# Prevent printing of warnings and such in the HTML
knitr::opts_chunk$set(warning = FALSE, message = FALSE)

library(tidyverse)
library(multitaper)
library(GenomicAlignments)
library(parallel)

This is to be done using the multitaper package https://github.com/krahim/multitaper/.

Normalize the data

bam.locs <- dir("../../alignment/hisat2/output", pattern = ".bam", recursive = TRUE, full.names = TRUE)

names(bam.locs) <- bam.locs %>%
  str_split(pattern = "/") %>%
  map(function(x){x[[6]]}) %>%
  str_remove(".bam") %>%
  str_replace("AraR", "REL60")

Read the bams in, only keeping + strand reads and excluding ERCCs and tRNAs.

bam.list <- mclapply(bam.locs, function(x){
  b1 <- readGAlignments(x)
  
  b2 <- b1[strand(b1) == "+" & grepl("ECB_", seqnames(b1))]
  
  return(b2)
}, mc.cores = 40)

Calculate periodicity, this seems to want to return a graph no matter what.

df.list <- lapply(bam.list, function(x){
  # count number of reads at each end point
  t1 <- table(factor(end(x), levels = 15:150))
  
  # apply spec function
  t2 <- spec.pgram(as.ts(t1))
  
  # get a df of graphable parts, rescale the spec height to 0-1 so it's comparable across samples
  t3 <- tibble(period = 1/t2$freq,
               spec = t2$spec)

  # return said df
  return(t3)
})

Bind all those together

df1 <- bind_rows(df.list, .id = "sample") %>%
  separate(sample, into = c("rep", "seqtype", "line"), sep = "-") %>% 
  filter(is.finite(period) & period <= 4) # only retain a small range

df1$line <- gsub("M", "-", df1$line) %>%
  gsub("P", "+", .) %>% 
  gsub("AraR", "REL0", .)

# save it
write_csv(df1, "../../data_frames/table_s2_three_nt_periodicity.csv")
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] GenomicAlignments_1.24.0    Rsamtools_2.4.0            
##  [3] Biostrings_2.56.0           XVector_0.28.0             
##  [5] SummarizedExperiment_1.18.1 DelayedArray_0.14.0        
##  [7] matrixStats_0.56.0          Biobase_2.48.0             
##  [9] GenomicRanges_1.40.0        GenomeInfoDb_1.24.2        
## [11] IRanges_2.22.2              S4Vectors_0.26.1           
## [13] BiocGenerics_0.34.0         multitaper_1.0-14          
## [15] forcats_0.5.0               stringr_1.4.0              
## [17] dplyr_1.0.1                 purrr_0.3.4                
## [19] readr_1.3.1                 tidyr_1.1.0                
## [21] tibble_3.0.3                ggplot2_3.3.2              
## [23] tidyverse_1.3.0            
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.1             jsonlite_1.7.0         modelr_0.1.8          
##  [4] assertthat_0.2.1       blob_1.2.1             GenomeInfoDbData_1.2.3
##  [7] cellranger_1.1.0       yaml_2.2.1             pillar_1.4.6          
## [10] backports_1.1.8        lattice_0.20-41        glue_1.4.1            
## [13] digest_0.6.25          rvest_0.3.5            colorspace_1.4-1      
## [16] htmltools_0.5.0        Matrix_1.2-18          pkgconfig_2.0.3       
## [19] broom_0.5.6            haven_2.3.1            zlibbioc_1.34.0       
## [22] scales_1.1.1           BiocParallel_1.22.0    generics_0.0.2        
## [25] ellipsis_0.3.1         withr_2.2.0            cli_2.0.2             
## [28] magrittr_1.5           crayon_1.3.4           readxl_1.3.1          
## [31] evaluate_0.14          fs_1.4.2               fansi_0.4.1           
## [34] nlme_3.1-149           xml2_1.3.2             tools_4.0.3           
## [37] hms_0.5.3              lifecycle_0.2.0        munsell_0.5.0         
## [40] reprex_0.3.0           compiler_4.0.3         rlang_0.4.7           
## [43] grid_4.0.3             RCurl_1.98-1.2         rstudioapi_0.11       
## [46] bitops_1.0-6           rmarkdown_2.3          gtable_0.3.0          
## [49] DBI_1.1.0              R6_2.4.1               lubridate_1.7.9       
## [52] knitr_1.29             stringi_1.4.6          Rcpp_1.0.5            
## [55] vctrs_0.3.2            dbplyr_1.4.4           tidyselect_1.1.0      
## [58] xfun_0.16